# ===============================================================#
#                     Replication files for:                     #
#.  "Attitudinal and Behavioral Legacies of Wartime Violence:    #
#                      A Meta-Analysis"                          #
#                        Joan Barceló                            #
#               American Political Science Review                #
#               Last update: September 3, 2025                   #
# ===============================================================#

#################################
# Appendix F: Moderator Analysis
################################

## ------------------------ Load Packages ------------------------

library(readr)
library(dplyr)
library(stringr)
library(purrr)
library(tidyr)
library(ggplot2)
library(ggrepel)
library(metafor)
library(broom)
library(knitr)

# ---------- Paths and file maps ----------

BASE <- "~/Datasets/"

# Outcome -> filename maps

file_map <- c(
  meta.groups        = "meta.groups.csv",
  meta.turnout       = "meta.turnout.csv",
  meta.trust         = "meta.trust.csv",
  meta.participation = "meta.participation.csv",
  meta.interest      = "meta.interest.csv",
  meta.games         = "meta.normative_games.csv",
  meta.leadership    = "meta.leadership.csv",
  meta.altruism      = "meta.altruism.csv",
  meta.warenemies    = "meta.warenemies.csv",
  meta.threat        = "meta.threat.csv",
  meta.intergroup    = "meta.intergroup.csv",
  meta.ingroup_trust = "meta.ingroup_trust.csv",
  meta.groupvoting   = "meta.groupvoting.csv",
  meta.groupid       = "meta.groupid.csv",
  meta.polintolerance= "meta.polintol.csv",
  meta.security      = "meta.security.csv",
  meta.justice       = "meta.punitive_justice.csv",
  meta.xtr_ideology  = "meta.xtr_ideology.csv",
  meta.autho         = "meta.autho.csv",
  meta.socintolerance= "meta.socintol.csv",
  meta.conflict      = "meta.peace.csv",
  meta.instmistrust  = "meta.institutional.csv"
)

## ------------------------ Load data ------------------------

load_csv_safe <- function(fname) {
  readr::read_csv(file.path(base_path, fname), show_col_types = FALSE, progress = FALSE)
}

datasets <- file_map %>%
  imap(~ load_csv_safe(.x)) %>%
  set_names(names(file_map))

assign_conflict_cleavage <- function(df) {
  df %>%
    mutate(
      cleavage = case_when(
        UCDPConflictCleavage == "1" ~ "Ethnic",
        UCDPConflictCleavage == "2" ~ "Ideological",
        ConflictStudy %in% c("Yugoslavian Wars", "Post-election violence",
                             "Turkish sieges of Vienna", "POLY", "Greco-Turkish War") ~ "Ethnic",
        TRUE ~ NA_character_
      ),
      cleavage = coalesce(cleavage, "Ideological")
    )
}

assign_exposure_type <- function(df) {
  df %>%
    mutate(
      ExposureTreat = if_else(str_detect(exptype2 %||% "", "o"), "Objective", "Self-reported"),
      ExposureTreat = if_else(str_detect(exptype %||% "", "N"), ExposureTreat, "Objective")
    )
}

recode_unit_analysis <- function(df) {
  df %>%
    mutate(
      individual = case_when(
        str_detect(UnitAnalysis %||% "", regex("Individual|Inidividual|Inividual|Household", ignore_case = TRUE)) ~ 1L,
        TRUE ~ 0L
      ),
      aggregate = case_when(
        str_detect(UnitAnalysis %||% "", regex("Chiefdom|Community|Locality|District|Municipality|Municipalities|Municipailty|Neighbourhood|Village|Hamlet|County", ignore_case = TRUE)) ~ 1L,
        str_detect(UnitAnalysis %||% "", regex("cluster", ignore_case = TRUE)) ~ 0L,
        TRUE ~ 0L
      ),
      aggregate = if_else(individual == 1L, 0L, aggregate)
    )
}

define_exposure_lag <- function(df) {
  df %>%
    mutate(
      ExposureLagMean = suppressWarnings(as.numeric(ExposureLagMean)),
      ExposureLagCategory = cut(
        ExposureLagMean,
        breaks = c(-Inf, 5, 20, Inf),
        labels = c("Recent", "Medium", "Long")
      )
    )
}

create_design_quality <- function(df) {
  safe_to_numeric <- function(x) {
    xx <- tolower(as.character(x))
    case_when(
      xx %in% c("y", "yes", "1") ~ 1,
      xx %in% c("n", "no", "0")  ~ 0,
      TRUE ~ NA_real_
    )
  }
  
  # accept either used_design or uesd_design
  used_flag <- function(nm) nm %in% names(df) && is.character(df[[nm]]) && any(!is.na(df[[nm]]))
  
  has_used  <- used_flag("used_design")
  has_uesd  <- used_flag("uesd_design")
  
  df %>%
    mutate(
      RandomArgument = if_else(
        (if ("did_design" %in% names(.)) did_design == "Y" else FALSE) |
          (if (has_used) used_design == "Y" else FALSE) |
          (if (has_uesd) uesd_design == "Y" else FALSE) |
          (if ("iv_design" %in% names(.)) iv_design == "Y" else FALSE) |
          (if ("rd_design" %in% names(.)) rd_design == "Y" else FALSE) |
          (if ("random_design" %in% names(.)) random_design == "Y" else FALSE) |
          (if ("exptype" %in% names(.)) exptype == "Y" else FALSE),
        1L, 0L
      ),
      SensitivityAnalysis  = if ("SensitivityAnalysis"  %in% names(.)) safe_to_numeric(SensitivityAnalysis)  else NA_real_,
      ControlSelectionBias = if ("ControlSelectionBias" %in% names(.)) safe_to_numeric(ControlSelectionBias) else NA_real_,
      ControlMigrationBias = if ("ControlMigrationBias" %in% names(.)) safe_to_numeric(ControlMigrationBias) else NA_real_,
      ControlSurvivalBias  = if ("ControlSurvivalBias"  %in% names(.)) safe_to_numeric(ControlSurvivalBias)  else NA_real_,
      quality_measures = rowSums(across(c(RandomArgument, SensitivityAnalysis,
                                          ControlSelectionBias, ControlMigrationBias,
                                          ControlSurvivalBias)), na.rm = TRUE),
      high_quality_measures = if_else(quality_measures > 3, "High", "Low")
    )
}

datasets <- datasets %>%
  map(assign_conflict_cleavage) %>%
  map(assign_exposure_type) %>%
  map(recode_unit_analysis) %>%
  map(define_exposure_lag) %>%
  map(create_design_quality)

create_meta_analysis_dataset <- function(df) {
  df %>%
    mutate(
      Effect            = as.numeric(coef),
      Variance          = as.numeric(se)^2,
      ExposureLagCategory = as.factor(ExposureLagCategory),
      ExposureType        = as.factor(ExposureTreat),
      ConflictCleavage    = as.factor(cleavage),
      DesignQuality       = as.factor(high_quality_measures),
      UnitAnalysis        = as.factor(aggregate),
      AuthorYear          = authoryear %||% NA_character_,
      StudyID             = as.character(seq_len(n()))
    ) %>%
    select(Effect, Variance, ExposureLagCategory, ExposureType, ConflictCleavage,
           DesignQuality, UnitAnalysis, AuthorYear, StudyID)
}

datasets <- datasets %>% map(create_meta_analysis_dataset)

datasets <- datasets %>%
  map(~ mutate(.x, DesignQuality = relevel(DesignQuality, ref = "Low")))

run_three_level_with_moderator <- function(df, moderator_vec) {
  if (!"obs_id" %in% names(df)) df$obs_id <- seq_len(nrow(df))
  
  if (length(moderator_vec) == 1) {
    moderator <- moderator_vec
    if (!(moderator %in% names(df))) return(NA)
    if (length(unique(na.omit(df[[moderator]]))) <= 1) return(NA)
    mod_formula <- as.formula(paste("~", moderator))
  } else {
    keep_mods <- moderator_vec[sapply(moderator_vec, function(mod) {
      mod %in% names(df) && length(unique(na.omit(df[[mod]]))) > 1
    })]
    if (length(keep_mods) == 0) return(NA)
    mod_formula <- as.formula(paste("~", paste(keep_mods, collapse = " + ")))
  }
  
  tryCatch(
    rma.mv(
      yi = Effect,
      V = Variance,
      mods = mod_formula,
      random = ~ 1 | AuthorYear / StudyID,
      data = df,
      test = "t"
    ),
    error = function(e) NA
  )
}

moderators <- c("ExposureType", "ConflictCleavage", "DesignQuality", "ExposureLagCategory", "UnitAnalysis")

model_results <- map(names(datasets), function(dataset_name) {
  df <- datasets[[dataset_name]]
  mods_list <- map(moderators, ~ run_three_level_with_moderator(df, .x)) %>%
    set_names(moderators)
  mods_list$AllModerators <- run_three_level_with_moderator(df, moderators)
  mods_list
}) %>% set_names(names(datasets))

extract_allmoderator_results <- function(model_list) {
  map_dfr(names(model_list), function(outcome_name) {
    model <- model_list[[outcome_name]][["AllModerators"]]
    if (inherits(model, "rma.mv")) {
      out <- broom::tidy(model, conf.int = TRUE)
      out$Outcome <- outcome_name
      out
    } else {
      NULL
    }
  })
}

all_coeffs <- extract_allmoderator_results(model_results)

format_coef_se_latex <- function(est, se, pval) {
  stars <- ifelse(pval < 0.001, "***",
                  ifelse(pval < 0.01,  "**",
                         ifelse(pval < 0.05,  "*",
                                ifelse(pval < 0.1,   ".", ""))))
  sprintf("%.3f%s \\\\ (%.3f)", est, stars, se)
}

formatted_table <- all_coeffs %>%
  mutate(
    term = sub(".*\\.", "", term),
    formatted = format_coef_se_latex(estimate, std.error, p.value)
  ) %>%
  select(Outcome, term, formatted) %>%
  pivot_wider(names_from = Outcome, values_from = formatted) %>%
  relocate(term)

study_counts <- map_dfr(datasets, function(df) {
  tibble(
    k = sprintf("%d \\\\ (k)", nrow(df)),
    N = sprintf("%d \\\\ (N)", dplyr::n_distinct(df$AuthorYear))
  )
}, .id = "Outcome") %>%
  pivot_longer(cols = c(k, N), names_to = "term", values_to = "formatted") %>%
  pivot_wider(names_from = Outcome, values_from = formatted) %>%
  relocate(term)

formatted_table <- bind_rows(formatted_table, study_counts)

all_cols <- names(formatted_table)
term_col <- "term"
outcome_cols <- setdiff(all_cols, term_col)
chunk_size <- 7
chunks <- split(outcome_cols, ceiling(seq_along(outcome_cols) / chunk_size))

for (i in seq_along(chunks)) {
  tbl <- formatted_table %>%
    select(any_of(c(term_col, chunks[[i]])))
  caption <- sprintf("Moderator Coefficients by Outcome (%d/%d)", i, length(chunks))
  kable(tbl, format = "latex", booktabs = TRUE, escape = FALSE, caption = caption) %>% cat()
}

